#include <fstream>
#include <sstream>
#include <iostream>
#include <time.h>
#include <algorithm>
#include <iterator>
#include <iomanip>
#include <limits>
#include <opencv2/core/utility.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/core/ocl.hpp>
#include <opencv2/video.hpp>
#include "Yolo_lib.h"

const char* keys =
"{help h usage ? | | Usage examples: \n\t\t Yolo.exe -i=image.jpg (or -i=video.avi)	}"
"{input i        |<none>| input image or video				}"
"{confidence c  | .5 | Confidence threshold					}"
"{threshold t   | .4 | Non-maximum suppression threshold	}"
;

using namespace cv;
using namespace dnn;
using namespace std;

// Initialize the parameters
float conf_threshold; // Confidence threshold
float nms_threshold;  // Non-maximum suppression threshold
int inpWidth = 320;  // Width of network's input image
int inpHeight = 320; // Height of network's input image
vector<std::string> classes;
vector<cv::Scalar> colors;

int main(int argc, char** argv)
{
	CommandLineParser parser(argc, argv, keys);
	parser.about("Use this script to run object detection using YOLO3 in OpenCV.");

	if (argc == 1) {
		std::cout << "Wrong argument, press -h for more information..." << endl;
		return -1;
	}
	if (parser.has("help"))
	{
		std::cout << "help message:" << endl;
		parser.printMessage();
		return 0;
	}
	
	conf_threshold = parser.get<float>("confidence");
	nms_threshold = parser.get<float>("threshold");

	if (!parser.check())
	{
		parser.printErrors();
		return 1;
	}

	std::cout << "The Arguments inserted are: \n";
	//I'm starting from i-1 since i-0 is the program itself
	for (int i = 1; i < argc; i++) {
		std::cout << "argv[" << i << "]: " << argv[i] << endl;
	}
	std::cout << endl;

	// Load names of classes
	std::string classesFile = "coco.names";

	CV_Assert(!classesFile.empty());
	// read file
	std::ifstream ifs(classesFile.c_str());
	if (!ifs.is_open()) {
		CV_Error(Error::StsError, "File " + classesFile + " not found");
	}

	std::string line;
	// put in classes all the lines from coco.names file
	while (std::getline(ifs, line))
	{
		classes.push_back(line);
	}

	if (!classes.empty()) {

		// print_vector(classes);

		RNG rng(43);
		for (int i = 0; i < classes.size(); ++i) {
			Scalar color = Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255));
			colors.push_back(color);
		}
		CV_Assert(colors.size() == classes.size());
		if (colors.size() != classes.size()) {
			std::cout << "Error..." << endl;
			return -1;
		}
	}

	// Give the configuration and weight files for the model
	String modelConfiguration = "yolov3.cfg";
	String modelWeights = "yolov3.weights";

	CV_Assert(!modelConfiguration.empty());
	CV_Assert(!modelWeights.empty());

	// Load the network
	std::cout << "Loading YOLO from disk...\n";
	Net net = readNetFromDarknet(modelConfiguration, modelWeights);
	net.setPreferableBackend(DNN_BACKEND_OPENCV);
	net.setPreferableTarget(DNN_TARGET_CPU);

	vector<String> ln;

	ln = getOutputsNames(net);

	string ext;

	if (parser.has("input")) {
		const std::string path = parser.get<String>("input");
		std::cout << "path is: " << path << std::endl;
		ext = getFileExt(path);

		std::cout << "The extension is \"" << ext << "\"\n";
	}

	if (ext == "jpg" || ext == "png") {

		// Open a image file
		string outputFile;
		int H, W;
		Mat frame, blob;
		outputFile = "yolo_out.jpg";

		frame = imread(parser.get<String>("input"));

		if (frame.empty())     // Check for invalid input
		{
			std::cout << "Could not open input image, not found..." << std::endl;
			return -1;
		}

		W = frame.size().width;
		H = frame.size().height;
		std::cout << "Width x Height: " << W << " x " << H << endl;

		// Create a 4D blob from a frame.
		cv::dnn::blobFromImage(frame, blob, 1 / 255.0, Size(inpWidth, inpHeight), Scalar(0, 0, 0), true, false);

		//Sets the input to the network
		net.setInput(blob);

		// Runs the forward pass to get output of the output layers
		vector<Mat> outs;

		net.forward(outs, ln);

		// Remove the bounding boxes with low confidence
		post_process(frame, outs, conf_threshold, nms_threshold, classes, colors);

		// Put efficiency information.
		std::vector<double> layersTimes;
		double freq = getTickFrequency();
		double t = net.getPerfProfile(layersTimes) / freq;
		std::string label = format("Inference time: %.2f s", t);
		std::cout << "Time: " << label << endl;

		// save the file
		cv::imwrite(outputFile, frame);

		// Create a window
		static const string window_name = "Deep learning object detection in OpenCV";
		cv::namedWindow(window_name, cv::WINDOW_AUTOSIZE);
		cv::imshow(window_name, frame);
		cv::waitKey(0);
		cv::destroyAllWindows();

		return 0;
	}
	else if (ext == "avi" || ext == "mp4") {

		// Open a video file 
		VideoCapture cap;
		string outputFile = "yolo_out.avi";
		Mat frame, blob;
		bool flag = true;

		if (cap.open(parser.get<String>("input")) == true) {
			std::cout << "File video correctly opened!" << endl;
		}
		else {
			std::cout << "Cannot open the video file...Exiting..." << endl;
			cin.get(); //wait for any key press
			return -1;
		}

		int frame_width = static_cast<int>(cap.get(CAP_PROP_FRAME_WIDTH)); //get the width of frames of the video
		int frame_height = static_cast<int>(cap.get(CAP_PROP_FRAME_HEIGHT)); //get the height of frames of the video
		int frame_count = static_cast<int>(cap.get(CAP_PROP_FRAME_COUNT));
		float frame_fps = static_cast<float>(cap.get(CAP_PROP_FPS));
		float duration = frame_count / frame_fps;

		std::cout << "Width x Height: " << frame_width << " x " << frame_height << endl;
		std::cout << "Total number of frame detected: " << frame_count << endl;
		std::cout << "Total fps: " << frame_fps << endl;
		std::cout << "Video Duration: " << std::fixed << std::setprecision(3) << duration << " seconds" << std::endl;

		Size frame_size(frame_width, frame_height);
		int frames_per_second = 20;
		int frames = 0;
		//Create and initialize the VideoWriter object 
		VideoWriter video(outputFile, VideoWriter::fourcc('M', 'J', 'P', 'G'), frames_per_second, frame_size, true);

		//If the VideoWriter object is not initialized successfully, exit the program
		if (video.isOpened() == false)
		{
			std::cout << "Cannot save the video to a file" << endl;
			cin.get(); //wait for any key press
			return -1;
		}

		while (true)
		{
			bool isSuccess = cap.read(frame); // read a new frame from the video file

			//Breaking the while loop if frames cannot be read from the file
			if (isSuccess == false)
			{
				std::cout << "Video ended up" << endl;
				break;
			}

			frames += 1;

			// Create a 4D blob from a frame.
			cv::dnn::blobFromImage(frame, blob, 1 / 255.0, Size(inpWidth, inpHeight), Scalar(0, 0, 0), true, false);

			//Sets the input to the network
			net.setInput(blob);

			// Runs the forward pass to get output of the output layers
			vector<Mat> outs;

			net.forward(outs, ln);

			// Remove the bounding boxes with low confidence
			post_process(frame, outs, conf_threshold, nms_threshold, classes, colors);

			if (flag == true) {
				flag = false;
				std::vector<double> layersTimes;
				double freq = getTickFrequency();
				double t = net.getPerfProfile(layersTimes) / freq;

				std::cout << "Single frame took: " << t << " seconds" << endl;
				std::cout << "Estimated total time to finish: " << t * frame_count << " seconds" << endl;
			}
			std::cout << "Processing Frame n: " << frames << endl;
			//write the video frame to the file
			video.write(frame);

			if (cv::waitKey(10) == 27)
			{
				std::cout << "Esc key is pressed by the user. Stopping the video" << endl;
				break;
			}
		}

		//Flush and close the video file
		video.release();

		std::cout << "Total number of processed frames: " << frames << endl;
		std::cout << "Output saved in: " << outputFile << endl;
		std::cout << "Exiting..." << endl;

		return 0;
	}


	else {
		std::cout << "Format extension not supported! Exit...\n";
		system("pause");
		return -1;

	}
	return 0;
}